#### IMPUS Proyect
#### Guatemala test script

rm(list=ls())

#### Load packages
install.packages("ipumsr")
install.packages("data.table")
library(ipumsr)
library(data.table)

#### Loading data
# gtm2002_a<- read_ipums_micro("C:/Users/jpinchoff/Dropbox/Pop Council/Urbanization agenda/Adol on the Move/Guate/IPUMS/ipumsi_00003.xml")
gtm2002_a<- read_ipums_micro("C:/Chabeli_Files/ipumsi_00003.xml")

#### Checking data

class(gtm2002_a)

#### Convert and save file
setDT(gtm2002_a)
class(gtm2002_a)

save(gtm2002_a, file = "GTM_2002_a.RData")
#write.csv(gtm2002_a, file = "G:/Documentos/IPUMS PERCC/GTM_2002_a.csv")

#### Load data
#gtm2<-load("G:\\Documentos\\IPUMS PERCC\\GTM_2002.RData")



### Explore de database
ls(gtm2002_a)

head(gtm2002_a)

table(gtm2002_a$SEX)
table(gtm2002_a$AGE)
table(gtm2002_a$AGE2)
table(gtm2002_a$MARST)
table(gtm2002_a$CHBORN)
table(gtm2002_a$LASTBYR)
table(gtm2002_a$LASTBMO)
table(gtm2002_a$SCHOOL)
table(gtm2002_a$LIT)
table(gtm2002_a$LABFORCE)
table(gtm2002_a$INDIG)
table(gtm2002_a$MIGRATE5)
table(gtm2002_a$GEOMIG1_5)
table(gtm2002_a$MIG1_5_GT)
table(gtm2002_a$MIGGT2)
table(gtm2002_a$YRSCHOOL)
table(gtm2002_a$LEFTSCH)
table(gtm2002_a$EMPSTAT)
table(gtm2002_a$EMPSTATD)
table(gtm2002_a$OCCISCO)
table(gtm2002_a$URBAN)

#### Duplicate ad rename variables to use

gtm2002_a$sex <- gtm2002_a$SEX
gtm2002_a$age <- gtm2002_a$AGE
gtm2002_a$age2 <- gtm2002_a$AGE2
gtm2002_a$marst <- gtm2002_a$MARST
gtm2002_a$chborn <- gtm2002_a$CHBORN
gtm2002_a$lastbyr <- gtm2002_a$LASTBYR
gtm2002_a$lastbmo <- gtm2002_a$LASTBMO
gtm2002_a$school <- gtm2002_a$SCHOOL
gtm2002_a$lit <- gtm2002_a$LIT
gtm2002_a$labforce <- gtm2002_a$LABFORCE
gtm2002_a$indig <- gtm2002_a$INDIG
gtm2002_a$migrate5 <- gtm2002_a$MIGRATE5
gtm2002_a$geomig1_5 <- gtm2002_a$GEOMIG1_5
gtm2002_a$mig1_5_gt <- gtm2002_a$MIG1_5_GT
gtm2002_a$miggt2 <- gtm2002_a$MIGGT2
gtm2002_a$yrschool <- gtm2002_a$YRSCHOOL
gtm2002_a$leftsch <- gtm2002_a$LEFTSCH
gtm2002_a$empstat <- gtm2002_a$EMPSTAT
gtm2002_a$empstatd <- gtm2002_a$EMPSTATD
gtm2002_a$occisco <- gtm2002_a$OCCISCO
gtm2002_a$related <- gtm2002_a$RELATED
gtm2002_a$country <- gtm2002_a$COUNTRY
gtm2002_a$year <- gtm2002_a$YEAR
gtm2002_a$sample <- gtm2002_a$SAMPLE
gtm2002_a$serial <- gtm2002_a$SERIAL
gtm2002_a$hhwt <- gtm2002_a$HHWT
gtm2002_a$pernum <- gtm2002_a$PERNUM
gtm2002_a$perwt <- gtm2002_a$PERWT
gtm2002_a$perwt <- gtm2002_a$PERWT

### Basic tabulates and descriptive analysis

### Selecting variables to use
install.packages("tidyverse")
library(tidyverse)

gtm_02<- gtm2002_a %>% 
            select(sex,
                   age,
                   age2,
                   marst,
                   chborn,
                   lastbyr,
                   lastbmo,
                   school,
                   labforce,
                   indig,
                   migrate5,
                   geomig1_5 ,
                   mig1_5_gt,
                   miggt2,
                   yrschool,
                   lit,
                   leftsch,
                   empstat,
                   empstatd,
                   occisco,
                   related,
                   country,
                   year,
                   urban,
                   sample,
                   serial,
                   hhwt,
                   pernum,
                   perwt)

#### survey design
install.packages("survey")
library(survey)

gtm_02$id <- 1:nrow(gtm_02)

dsgiph <- svydesign(id = ~id,
                    weights = ~perwt,
                    nest    = TRUE,
                    data    = gtm_02)
#### test of svydesign
dsgiph
svymean(~gtm_02$age, dsgiph)

#### Basic tabulates and descriptive of the sample

#### Individual age and sex
svytable(~age+sex, design = dsgiph)

#### Age groups and sex 
svytable(~age2+sex, design = dsgiph)

### subset females 10+
nrow(gtm_02)
gtfem02<- filter(gtm_02, sex ==2)
nrow(gtfem02)
#### No run?
gtfm02<- filter(gtfem02, age2 >=3)
nrow(gtfm02)

#### subset design
dsgiphfem <- svydesign(id = ~id,
                    weights = ~perwt,
                    nest    = TRUE,
                    data    = gtfm02)
#### test of svydesign
dsgiphfem
svymean(~gtfm02$age, dsgiphfem)


#### Basic analysis

###### Age
svytable(~age2, design = dsgiph)


#### Marital status
svytable(~age2+marst, design = dsgiph)

###### Children ever born
svytable(~age2+chborn, design = dsgiph)

#### Education
###### Literacy
svytable(~age2+lit, design = dsgiph)

###### school
svytable(~age2+school, design = dsgiph)

###### yrschool
svytable(~age2+yrschool, design = dsgiph)

###### leftsch
svytable(~age2+leftsch, design = dsgiph)

#### Work
svytable(~age2+labforce, design = dsgiph)

###### laborforce and leftschool
svytable(~labforce+leftsch, design = dsgiph)

#### Migration
svytable(~age2+migrate5, design = dsgiph)

###### Geographic location 5 years ago
######## Department of residence 5 years ago, Guatemala; consistent boundaries, GIS
svytable(~age2+mig1_5_gt, design = dsgiph)

######## Municipality of residence 5 years ago, Guatemala
svytable(~age2+miggt2, design = dsgiph)













